For the reader: You can click the black “code” box on the right hand side to either see or hide the R code behind analysis results and graphs.
The results are arranged to separate tabs by site/area, so that it’s easy to look at just the results of the site of interest, instead of scrolling accidentally down to a different site and getting confused.
The results for each site consist of four parts: pXRF, particle size distribution, loss on ignition, and colorimetry.
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
# Libraries
library(openxlsx);
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms
# Read the raw data
pxrf <- read.xlsx("data/pxrf_AY.xlsx", sep=";")
# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))
# Remove unused columns
pxrf <- pxrf %>%
select(-Site) %>%
select(-'Other;name') %>%
select(-'File;#') %>%
select(-DateTime) %>%
select(-Application) %>%
select(-Method) %>%
select(-ElapsedTime) %>%
select(-'Cal;Check')
# Coercing "< LOD" -results to NA:s
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)
# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)
# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)
# Assigning sample names as new row names
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)
# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"
# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))
# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])
# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)
pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)
colnames(pxrf_averaged)
## [1] "Area" "Type" "MgO" "MgO;Err" "Al2O3" "Al2O3;Err"
## [7] "SiO2" "SiO2;Err" "P2O5" "P2O5;Err" "S" "S;Err"
## [13] "Cl" "Cl;Err" "K2O" "K2O;Err" "CaO" "CaO;Err"
## [19] "Ti" "Ti;Err" "V" "V;Err" "Cr" "Cr;Err"
## [25] "Mn" "Mn;Err" "Fe" "Fe;Err" "Co" "Co;Err"
## [31] "Ni" "Ni;Err" "Cu" "Cu;Err" "Zn" "Zn;Err"
## [37] "As" "As;Err" "Se" "Se;Err" "Rb" "Rb;Err"
## [43] "Sr" "Sr;Err" "Y" "Y;Err" "Zr" "Zr;Err"
## [49] "Nb" "Nb;Err" "Mo" "Mo;Err" "Rh" "Rh;Err"
## [55] "Pd" "Pd;Err" "Ag" "Ag;Err" "Cd" "Cd;Err"
## [61] "Sn" "Sn;Err" "Sb" "Sb;Err" "Ba" "Ba;Err"
## [67] "La" "La;Err" "Ce" "Ce;Err" "Hf" "Hf;Err"
## [73] "Ta" "Ta;Err" "W" "W;Err" "Pt" "Pt;Err"
## [79] "Au" "Au;Err" "Hg" "Hg;Err" "Tl" "Tl;Err"
## [85] "Pb" "Pb;Err" "Bi" "Bi;Err" "Th" "Th;Err"
## [91] "U" "U;Err"
pxrf_all: All elements (the numbers below are the amount of NA values)
colSums(is.na(pxrf_all))
## Area Type MgO Al2O3 SiO2 P2O5 S Cl K2O CaO Ti V Mn
## 0 0 1 0 0 0 43 1 0 0 0 28 0
## Fe Ni Cu Zn As Rb Sr Y Zr Nb Ba
## 0 1 0 0 27 0 0 2 0 31 45
pxrf_final: Final analysis data set with only the selected elements
# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>%
select(-Al2O3) %>%
select(-CaO) %>%
select(-SiO2) %>%
select(-Cl) %>%
select(-K2O) %>%
select(-Ni) %>%
select(-P2O5) %>%
select(-S) %>%
select(-V) %>%
select(-As) %>%
select(-Nb) %>%
select(-Ba)
# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0
# Final analysis data set
colnames(pxrf_final)
## [1] "Area" "Type" "MgO" "Ti" "Mn" "Fe" "Cu" "Zn" "Rb" "Sr"
## [11] "Y" "Zr"
# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:12], 7), data = pxrf_final, label = TRUE, label.size = 3)
PCA with final elements:
# Elements
colnames(pxrf_final[3:12])
## [1] "MgO" "Ti" "Mn" "Fe" "Cu" "Zn" "Rb" "Sr" "Y" "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:12])
summary(pca_1)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.3836 1.2233 1.0086 0.84957 0.60483 0.49903 0.44478
## Proportion of Variance 0.5719 0.1506 0.1024 0.07265 0.03682 0.02507 0.01991
## Cumulative Proportion 0.5719 0.7225 0.8249 0.89752 0.93434 0.95941 0.97932
## PC8 PC9 PC10
## Standard deviation 0.3538 0.22250 0.17534
## Proportion of Variance 0.0126 0.00498 0.00309
## Cumulative Proportion 0.9919 0.99691 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam elements")
autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE, main = "PCA Ashdod-Yam grouped by area")
autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE, main = "PCA Ashdod-Yam grouped by sample type")
PCA(pxrf_final[3:12])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 47 individuals, described by 10 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
# HCA dendrogram, samples color coded by area:
dend <-
pxrf_final %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
color_branches(k=7)
area <- as.factor(pxrf_final[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]
plot(dend, main="HCA with sample areas")
rect.dendrogram(dend, 7, border = "Black")
legend("topright", legend = levels(area), fill = cols_a)
# HCA dendrogram, samples color coded by type:
dend2 <-
pxrf_final %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
color_branches(k=7)
type <- as.factor(pxrf_final[, 2])
n_type <- length(unique(type))
cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l = 50)
col_type <- cols_t[type]
labels_colors(dend2) <- col_type[order.dendrogram(dend2)]
plot(dend2, main="HCA with sample types")
rect.dendrogram(dend, 7, border = "Black")
legend("topright", legend = levels(type), fill = cols_t)
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern)
# Read and filter data
grain <- read.xlsx("data/grain_AY.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))
# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(grain_01)), size=3)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=2)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
labs(title="Clay silt sand") +
theme_nomask() +
geom_mask() +
geom_point(size=2)
We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.
# Libraries
library(openxlsx);
library(dplyr);
library(ggplot2);
library(ggrepel);
library(tidyr);
library(reshape2) # Melt-function for reshaping data to long form
loi <- read.xlsx("data/loi_AY.xlsx", sep=";")
tga <- read.xlsx("data/tga_AY.xlsx", sep=";")
summary(loi)
## sample crucible wet_weight sample_wet
## Length:52 Min. :8.079 Min. :10.13 Min. :1.538
## Class :character 1st Qu.:8.566 1st Qu.:11.00 1st Qu.:2.464
## Mode :character Median :9.006 Median :11.71 Median :2.635
## Mean :8.994 Mean :11.55 Mean :2.559
## 3rd Qu.:9.435 3rd Qu.:12.05 3rd Qu.:2.824
## Max. :9.925 Max. :12.68 Max. :3.043
## dry_weight after_550_C after_950_C context
## Min. :10.11 Min. :10.08 Min. :10.05 Length:52
## 1st Qu.:10.97 1st Qu.:10.94 1st Qu.:10.86 Class :character
## Median :11.67 Median :11.62 Median :11.53 Mode :character
## Mean :11.52 Mean :11.47 Mean :11.41
## 3rd Qu.:12.00 3rd Qu.:11.96 3rd Qu.:11.93
## Max. :12.65 Max. :12.60 Max. :12.52
## type
## Length:52
## Class :character
## Mode :character
##
##
##
summary(tga)
## Name Analysis;Date Batch Wet
## Length:53 Length:53 Length:53 Length:53
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## Dry Mass_550 Mass_950 context
## Length:53 Length:53 Length:53 Length:53
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## type
## Length:53
## Class :character
## Mode :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
# Colored boxplots in same graph
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") # Reshaping data to long form
ggplot(loi_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Traditional LOI by temperature",
x ="Temperature", y = "LOI")
ggplot(loi_long, aes(x = context, y = value, color = variable)) +
geom_boxplot() +
labs(title="Traditional LOI by context",
x ="Context", y = "LOI")
# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum.
boxplot.stats(loi$c950)
## $stats
## [1] 0.398150 1.528018 2.198513 3.162314 4.889189
##
## $n
## [1] 52
##
## $conf
## [1] 1.840428 2.556598
##
## $out
## [1] 8.041755 11.960444
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)
# Removing duplicates from the next graph
rownames(loi)
## [1] "AY-1" "AY-2" "AY-3" "AY-4" "AY-5" "AY-6" "AY-7"
## [8] "AY-8" "AY-9" "AY-10" "AY-11" "AY-12" "AY-13" "AY-14"
## [15] "AY-15" "AY-16" "AY-17" "AY-18" "AY-19" "AY-20" "AY-21"
## [22] "AY-22" "AY-23" "AY-24" "AY-25" "AY-26" "AY-27" "AY-28"
## [29] "AY-29" "AY-30" "AY-31" "AY-32" "AY-33" "AY-34" "AY-35"
## [36] "AY-36" "AY-37" "AY-38" "AY-39" "AY-40" "AY-41" "AY-42"
## [43] "AY-50" "AY-51" "AY-52" "AY-53" "AY-54" "AY-50_2" "AY-51_2"
## [50] "AY-52_2" "AY-53_2" "AY-54_2"
loi <- subset(loi[1:47, ])
# Color by context
ggplot(loi,
aes(c550, c950, label = rownames(loi), colour = factor(context))) +
geom_point(size=2, aes(shape = factor(type))) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
# Color by type
ggplot(loi,
aes(c550, c950)) +
geom_point(size=2, aes(shape = factor(type), colour = factor(context))) +
labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)
ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Colored boxplots in same graph
tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") # Reshaping data to long form
ggplot(tga_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI by temperature",
x ="Temperature", y = "LOI")
ggplot(tga_long, aes(x = context, y = value, color = variable)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI by context",
x ="Context", y = "LOI")
# Color by context biplot
ggplot(tga,
aes(c550, c950, label = rownames(tga), colour = factor(context))) +
geom_point(size=2) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)
ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_AY.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :37.56 Min. : 7.97 Min. :16.81
## 1st Qu.:42.13 1st Qu.: 9.72 1st Qu.:19.76
## Median :43.64 Median :10.38 Median :21.18
## Mean :43.92 Mean :10.49 Mean :21.22
## 3rd Qu.:45.91 3rd Qu.:11.29 3rd Qu.:22.34
## Max. :48.24 Max. :13.53 Max. :25.03
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color (a*b)") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "A versus luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "B versus luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
# Libraries
library(openxlsx);
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms
# Read the raw data
pxrf <- read.xlsx("data/pxrf_AYB.xlsx", sep=";")
# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))
# Remove unused columns
pxrf <- pxrf %>%
select(-Site) %>%
select(-'Other;name') %>%
select(-'File;#') %>%
select(-DateTime) %>%
select(-Application) %>%
select(-Method) %>%
select(-ElapsedTime) %>%
select(-'Cal;Check')
# Coercing "< LOD" -results to NA:s
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)
# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)
# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)
# Assigning sample names as new row names
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)
# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"
# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))
# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])
# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)
pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)
colnames(pxrf_averaged)
## [1] "Area" "Type" "MgO" "MgO;Err" "Al2O3" "Al2O3;Err"
## [7] "SiO2" "SiO2;Err" "P2O5" "P2O5;Err" "S" "S;Err"
## [13] "Cl" "Cl;Err" "K2O" "K2O;Err" "CaO" "CaO;Err"
## [19] "Ti" "Ti;Err" "V" "V;Err" "Cr" "Cr;Err"
## [25] "Mn" "Mn;Err" "Fe" "Fe;Err" "Co" "Co;Err"
## [31] "Ni" "Ni;Err" "Cu" "Cu;Err" "Zn" "Zn;Err"
## [37] "As" "As;Err" "Se" "Se;Err" "Rb" "Rb;Err"
## [43] "Sr" "Sr;Err" "Y" "Y;Err" "Zr" "Zr;Err"
## [49] "Nb" "Nb;Err" "Mo" "Mo;Err" "Rh" "Rh;Err"
## [55] "Pd" "Pd;Err" "Ag" "Ag;Err" "Cd" "Cd;Err"
## [61] "Sn" "Sn;Err" "Sb" "Sb;Err" "Ba" "Ba;Err"
## [67] "La" "La;Err" "Ce" "Ce;Err" "Hf" "Hf;Err"
## [73] "Ta" "Ta;Err" "W" "W;Err" "Pt" "Pt;Err"
## [79] "Au" "Au;Err" "Hg" "Hg;Err" "Tl" "Tl;Err"
## [85] "Pb" "Pb;Err" "Bi" "Bi;Err" "Th" "Th;Err"
## [91] "U" "U;Err"
pxrf_all: All elements (the numbers below are the amount of NA values)
colSums(is.na(pxrf_all))
## Area Type MgO Al2O3 SiO2 P2O5 S Cl K2O CaO Ti Mn Fe
## 0 0 1 0 0 0 1 0 2 0 0 0 0
## Ni Cu Zn As Rb Sr Y Zr Pb
## 0 0 0 1 1 0 2 0 3
pxrf_final: Final analysis data set with only the selected elements
# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>%
select(-Al2O3) %>%
select(-CaO) %>%
select(-SiO2) %>%
select(-Cl) %>%
select(-K2O) %>%
select(-Ni) %>%
select(-P2O5) %>%
select(-S) %>%
select(-As) %>%
select(-Pb)
# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0
# Final analysis data set
colnames(pxrf_final)
## [1] "Area" "Type" "MgO" "Ti" "Mn" "Fe" "Cu" "Zn" "Rb" "Sr"
## [11] "Y" "Zr"
# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:12], 4), data = pxrf_final, label = TRUE, label.size = 3)
PCA with final elements:
# Elements
colnames(pxrf_final[3:12])
## [1] "MgO" "Ti" "Mn" "Fe" "Cu" "Zn" "Rb" "Sr" "Y" "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:12])
summary(pca_1)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.3569 1.4536 0.9800 0.72263 0.38973 0.17504 1.58e-16
## Proportion of Variance 0.5952 0.2264 0.1029 0.05595 0.01627 0.00328 0.00e+00
## Cumulative Proportion 0.5952 0.8216 0.9245 0.98044 0.99672 1.00000 1.00e+00
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Byzantine elements")
autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE, main = "PCA Byzantine grouped by sample type")
PCA(pxrf_final[3:12])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 7 individuals, described by 10 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
# HCA dendrogram, samples color coded by type:
dend2 <-
pxrf_final %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
color_branches(k=5)
type <- as.factor(pxrf_final[, 2])
n_type <- length(unique(type))
cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l = 50)
col_type <- cols_t[type]
labels_colors(dend2) <- col_type[order.dendrogram(dend2)]
plot(dend2, main="HCA with sample types")
rect.dendrogram(dend2, 5, border = "Black")
legend("topright", legend = levels(type), fill = cols_t)
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern)
# Read and filter data
grain <- read.xlsx("data/grain_AYB.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))
# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(grain_01)), size=3)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
labs(title="Clay silt sand") +
theme_nomask() +
geom_mask() +
geom_point(size=2)
We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.
# Libraries
library(openxlsx);
library(dplyr);
library(ggplot2);
library(ggrepel);
library(tidyr);
library(reshape2) # Melt-function for reshaping data to long form
loi <- read.xlsx("data/loi_AYB.xlsx", sep=";")
tga <- read.xlsx("data/tga_AYB.xlsx", sep=";")
summary(loi)
## sample crucible wet_weight sample_wet
## Length:7 Min. :8.382 Min. :10.92 Min. :2.257
## Class :character 1st Qu.:8.555 1st Qu.:11.39 1st Qu.:2.613
## Mode :character Median :8.666 Median :11.50 Median :2.880
## Mean :8.943 Mean :11.68 Mean :2.739
## 3rd Qu.:9.442 3rd Qu.:12.13 3rd Qu.:2.905
## Max. :9.560 Max. :12.31 Max. :3.002
## dry_weight after_550_C after_950_C context
## Min. :10.83 Min. :10.62 Min. :10.50 Length:7
## 1st Qu.:11.33 1st Qu.:11.25 1st Qu.:11.04 Class :character
## Median :11.45 Median :11.39 Median :11.21 Mode :character
## Mean :11.63 Mean :11.55 Mean :11.24
## 3rd Qu.:12.11 3rd Qu.:12.05 3rd Qu.:11.43
## Max. :12.26 Max. :12.24 Max. :12.06
## type
## Length:7
## Class :character
## Mode :character
##
##
##
summary(tga)
## Name Analysis;Date Batch Wet
## Length:7 Min. :44536 Length:7 Length:7
## Class :character 1st Qu.:44536 Class :character Class :character
## Mode :character Median :44536 Mode :character Mode :character
## Mean :44536
## 3rd Qu.:44536
## Max. :44536
## Dry Mass_550 Mass_950 context
## Length:7 Length:7 Length:7 Length:7
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## type
## Length:7
## Class :character
## Mode :character
##
##
##
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
# Colored boxplots in same graph
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") # Reshaping data to long form
ggplot(loi_long, aes(x = context, y = value, color = variable)) +
geom_boxplot() +
labs(title="Traditional LOI by context",
x ="Context", y = "LOI")
# Color by type
ggplot(loi,
aes(c550, c950)) +
geom_point(size=2, aes(shape = factor(context), colour = factor(type))) +
labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)
ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Colored boxplots in same graph
tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") # Reshaping data to long form
ggplot(tga_long, aes(x = context, y = value, color = variable)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI by context",
x ="Context", y = "LOI")
# Color by type biplot
ggplot(tga,
aes(c550, c950, label = rownames(tga), colour = factor(type))) +
geom_point(size=2) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)
ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_AYB.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :34.56 Min. : 2.480 Min. : 6.98
## 1st Qu.:38.93 1st Qu.: 5.215 1st Qu.:12.73
## Median :45.30 Median : 5.540 Median :13.28
## Mean :46.69 Mean : 6.211 Mean :14.56
## 3rd Qu.:50.95 3rd Qu.: 6.675 3rd Qu.:16.21
## Max. :67.20 Max. :11.680 Max. :23.75
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color (a*b)") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "A versus luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "B versus luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
# Libraries
library(openxlsx);
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms
# Read the raw data
pxrf <- read.xlsx("data/pxrf_israel.xlsx", sep=";")
# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))
# Remove unused columns
pxrf <- pxrf %>%
select(-Site) %>%
select(-'Other;name') %>%
select(-'File;#') %>%
select(-DateTime) %>%
select(-Application) %>%
select(-Method) %>%
select(-ElapsedTime) %>%
select(-'Cal;Check')
# Coercing "< LOD" -results to NA:s
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)
# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)
# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)
# Assigning sample names as new row names
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)
# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"
# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))
# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])
# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)
pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)
colnames(pxrf_averaged)
## [1] "Area" "Type" "MgO" "MgO;Err" "Al2O3" "Al2O3;Err"
## [7] "SiO2" "SiO2;Err" "P2O5" "P2O5;Err" "S" "S;Err"
## [13] "Cl" "Cl;Err" "K2O" "K2O;Err" "CaO" "CaO;Err"
## [19] "Ti" "Ti;Err" "V" "V;Err" "Cr" "Cr;Err"
## [25] "Mn" "Mn;Err" "Fe" "Fe;Err" "Co" "Co;Err"
## [31] "Ni" "Ni;Err" "Cu" "Cu;Err" "Zn" "Zn;Err"
## [37] "As" "As;Err" "Se" "Se;Err" "Rb" "Rb;Err"
## [43] "Sr" "Sr;Err" "Y" "Y;Err" "Zr" "Zr;Err"
## [49] "Nb" "Nb;Err" "Mo" "Mo;Err" "Rh" "Rh;Err"
## [55] "Pd" "Pd;Err" "Ag" "Ag;Err" "Cd" "Cd;Err"
## [61] "Sn" "Sn;Err" "Sb" "Sb;Err" "Ba" "Ba;Err"
## [67] "La" "La;Err" "Ce" "Ce;Err" "Hf" "Hf;Err"
## [73] "Ta" "Ta;Err" "W" "W;Err" "Pt" "Pt;Err"
## [79] "Au" "Au;Err" "Hg" "Hg;Err" "Tl" "Tl;Err"
## [85] "Pb" "Pb;Err" "Bi" "Bi;Err" "Th" "Th;Err"
## [91] "U" "U;Err"
pxrf_all: All elements (the numbers below are the amount of NA values)
colSums(is.na(pxrf_all))
## Area Type MgO Al2O3 SiO2 P2O5 S Cl K2O CaO Ti Mn Fe
## 0 0 0 0 1 0 3 0 8 0 0 0 0
## Ni Cu Zn Rb Sr Y Zr Nb
## 11 0 0 6 0 16 0 17
pxrf_final: Final analysis data set with only the selected elements
# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>%
select(-Al2O3) %>%
select(-CaO) %>%
select(-SiO2) %>%
select(-Cl) %>%
select(-K2O) %>%
select(-Ni) %>%
select(-P2O5) %>%
select(-S) %>%
select(-Nb)
# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0
# Final analysis data set
colnames(pxrf_final)
## [1] "Area" "Type" "MgO" "Ti" "Mn" "Fe" "Cu" "Zn" "Rb" "Sr"
## [11] "Y" "Zr"
pxrf_MB: Final analysis data set for MUDBRICK samples only (= no floor or lime plaster)
pxrf_MB <- pxrf_final[-c(1:7), ]
rownames(pxrf_MB)
## [1] "AH-4" "AH-5" "RLZ-1" "TI-1" "TI-10" "TI-2" "TI-3" "TI-4" "TI-5"
## [10] "TI-6" "TI-7" "TI-8"
# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:12], 7), data = pxrf_final, label = TRUE, label.size = 3)
PCA with final elements:
# Elements
colnames(pxrf_final[3:12])
## [1] "MgO" "Ti" "Mn" "Fe" "Cu" "Zn" "Rb" "Sr" "Y" "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:12])
summary(pca_1)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.0850 1.3922 1.0863 0.70334 0.64347 0.44563 0.31268
## Proportion of Variance 0.4953 0.2208 0.1344 0.05636 0.04717 0.02262 0.01114
## Cumulative Proportion 0.4953 0.7161 0.8505 0.90687 0.95404 0.97666 0.98780
## PC8 PC9 PC10
## Standard deviation 0.24332 0.18520 0.11656
## Proportion of Variance 0.00674 0.00391 0.00155
## Cumulative Proportion 0.99454 0.99845 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Israel elements")
autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE, main = "PCA Israel grouped by area")
PCA(pxrf_final[3:12])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 19 individuals, described by 10 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
PCA with only mudbrick samples:
# Elements
colnames(pxrf_MB[3:12])
## [1] "MgO" "Ti" "Mn" "Fe" "Cu" "Zn" "Rb" "Sr" "Y" "Zr"
# PCA analysis
pca_2 <- prcomp(pxrf_MB[3:12])
summary(pca_2)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.3606 1.6226 0.7143 0.52086 0.42466 0.34190 0.21405
## Proportion of Variance 0.5941 0.2807 0.0544 0.02892 0.01923 0.01246 0.00488
## Cumulative Proportion 0.5941 0.8748 0.9292 0.95809 0.97732 0.98978 0.99467
## PC8 PC9 PC10
## Standard deviation 0.18778 0.10943 0.05281
## Proportion of Variance 0.00376 0.00128 0.00030
## Cumulative Proportion 0.99843 0.99970 1.00000
# PCA plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Israel elements")
autoplot(pca_2, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE, main = "PCA Israel grouped by area")
PCA(pxrf_MB[3:12])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 12 individuals, described by 10 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
# HCA dendrogram, samples color coded by area:
dend <-
pxrf_MB %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
color_branches(k=4)
area <- as.factor(pxrf_MB[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]
plot(dend, main="HCA with sample areas")
rect.dendrogram(dend, 4, border = "Black")
legend("topright", legend = levels(area), fill = cols_a)
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern)
# Read and filter data
grain <- read.xlsx("data/grain_israel.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))
# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(grain_01)), size=3)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_nomask() +
geom_mask() +
geom_point(size=2)
We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.
# Libraries
library(openxlsx);
library(dplyr);
library(ggplot2);
library(ggrepel);
library(tidyr);
library(reshape2) # Melt-function for reshaping data to long form
loi <- read.xlsx("data/loi_israel.xlsx", sep=";")
tga <- read.xlsx("data/tga_israel.xlsx", sep=";")
summary(loi)
## sample crucible wet_weight sample_wet
## Length:19 Min. :7.921 Min. :10.55 Min. :2.081
## Class :character 1st Qu.:8.557 1st Qu.:11.23 1st Qu.:2.403
## Mode :character Median :9.278 Median :11.78 Median :2.584
## Mean :9.017 Mean :11.63 Mean :2.611
## 3rd Qu.:9.452 3rd Qu.:11.92 3rd Qu.:2.828
## Max. :9.787 Max. :12.62 Max. :3.231
## dry_weight after_550_C after_950_C context
## Min. :10.52 Min. :10.42 Min. : 9.773 Length:19
## 1st Qu.:11.23 1st Qu.:11.18 1st Qu.:10.376 Class :character
## Median :11.76 Median :11.70 Median :10.971 Mode :character
## Mean :11.60 Mean :11.53 Mean :10.884
## 3rd Qu.:11.88 3rd Qu.:11.78 3rd Qu.:11.322
## Max. :12.56 Max. :12.49 Max. :12.371
## type
## Length:19
## Class :character
## Mode :character
##
##
##
summary(tga)
## Name Analysis;Date Batch Wet
## Length:12 Length:12 Length:12 Length:12
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## Dry Mass_550 Mass_950 context
## Length:12 Length:12 Length:12 Length:12
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## type
## Length:12
## Class :character
## Mode :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
NOTE! Remember to remove Ah-Halom floor and plaster samples from boxplots!
# Colored boxplots in same graph
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") # Reshaping data to long form
ggplot(loi_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Traditional LOI by temperature",
x ="Temperature", y = "LOI")
ggplot(loi_long, aes(x = context, y = value, color = variable)) +
geom_boxplot() +
labs(title="Traditional LOI by context",
x ="Context", y = "LOI")
# Color by context
ggplot(loi,
aes(c550, c950, label = rownames(loi), colour = factor(context))) +
geom_point(size=2, aes(shape = factor(type))) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
# Color by type
ggplot(loi,
aes(c550, c950)) +
geom_point(size=2, aes(shape = factor(context), colour = factor(type))) +
labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)
ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Colored boxplots in same graph
tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") # Reshaping data to long form
ggplot(tga_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI by temperature",
x ="Temperature", y = "LOI")
ggplot(tga_long, aes(x = context, y = value, color = variable)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI by context",
x ="Context", y = "LOI")
# Color by context biplot
ggplot(tga,
aes(c550, c950, label = rownames(tga), colour = factor(context))) +
geom_point(size=2) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)
ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_israel.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :43.75 Min. : 3.160 Min. :13.85
## 1st Qu.:56.08 1st Qu.: 3.770 1st Qu.:14.71
## Median :64.30 Median : 4.350 Median :15.85
## Mean :63.19 Mean : 6.649 Mean :17.27
## 3rd Qu.:71.51 3rd Qu.: 6.280 3rd Qu.:17.70
## Max. :75.34 Max. :19.530 Max. :25.84
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color (a*b)") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "A versus luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "B versus luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
# Libraries
library(openxlsx);
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms
# Read the raw data
pxrf <- read.xlsx("data/pxrf_PP.xlsx", sep=";")
# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))
# Remove unused columns
pxrf <- pxrf %>%
select(-Site) %>%
select(-'Other;name') %>%
select(-'File;#') %>%
select(-DateTime) %>%
select(-Application) %>%
select(-Method) %>%
select(-ElapsedTime) %>%
select(-'Cal;Check')
# Coercing "< LOD" -results to NA:s
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)
# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)
# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)
# Assigning sample names as new row names
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)
# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"
# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))
# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])
# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)
pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)
colnames(pxrf_averaged)
## [1] "Area" "Type" "MgO" "MgO;Err" "Al2O3" "Al2O3;Err"
## [7] "SiO2" "SiO2;Err" "P2O5" "P2O5;Err" "S" "S;Err"
## [13] "Cl" "Cl;Err" "K2O" "K2O;Err" "CaO" "CaO;Err"
## [19] "Ti" "Ti;Err" "V" "V;Err" "Cr" "Cr;Err"
## [25] "Mn" "Mn;Err" "Fe" "Fe;Err" "Co" "Co;Err"
## [31] "Ni" "Ni;Err" "Cu" "Cu;Err" "Zn" "Zn;Err"
## [37] "As" "As;Err" "Se" "Se;Err" "Rb" "Rb;Err"
## [43] "Sr" "Sr;Err" "Y" "Y;Err" "Zr" "Zr;Err"
## [49] "Nb" "Nb;Err" "Mo" "Mo;Err" "Rh" "Rh;Err"
## [55] "Pd" "Pd;Err" "Ag" "Ag;Err" "Cd" "Cd;Err"
## [61] "Sn" "Sn;Err" "Sb" "Sb;Err" "Ba" "Ba;Err"
## [67] "La" "La;Err" "Ce" "Ce;Err" "Hf" "Hf;Err"
## [73] "Ta" "Ta;Err" "W" "W;Err" "Pt" "Pt;Err"
## [79] "Au" "Au;Err" "Hg" "Hg;Err" "Tl" "Tl;Err"
## [85] "Pb" "Pb;Err" "Bi" "Bi;Err" "Th" "Th;Err"
## [91] "U" "U;Err"
pxrf_all: All elements (the numbers below are the amount of NA values)
colSums(is.na(pxrf_all))
## Area Type MgO Al2O3 SiO2 P2O5 S Cl K2O CaO Ti V Mn
## 0 0 34 34 34 35 35 34 36 34 0 41 0
## Fe Ni Cu Zn As Rb Sr Y Zr Rh Ag
## 0 0 0 34 48 35 34 36 0 51 49
pxrf_final: Final analysis data set with only the selected elements, all samples (soil and published included)
# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>%
select(-Al2O3) %>%
select(-CaO) %>%
select(-SiO2) %>%
select(-Cl) %>%
select(-K2O) %>%
select(-Ni) %>%
select(-P2O5) %>%
select(-S) %>%
select(-V) %>%
select(-As) %>%
select(-Rh) %>%
select(-Ag)
# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0
# Final analysis data set
colnames(pxrf_final)
## [1] "Area" "Type" "MgO" "Ti" "Mn" "Fe" "Cu" "Zn" "Rb" "Sr"
## [11] "Y" "Zr"
pxrf_MB: Final analysis data set for NEW MUDBRICK samples only (= no soil or published samples)
pxrf_MB <- pxrf_final[c(1:11), ]
rownames(pxrf_MB)
## [1] "PP-10" "PP-11" "PP-12" "PP-13" "PP-9" "PP-19" "PP-14" "PP-15" "PP-16"
## [10] "PP-17" "PP-18"
pxrf_soil: Final analysis data set for NEW samples only (= no published samples, yes soil)
pxrf_soil <- pxrf_final[-c(12:45), ]
rownames(pxrf_soil)
## [1] "PP-10" "PP-11" "PP-12" "PP-13" "PP-9" "PP-19" "PP-14" "PP-15" "PP-16"
## [10] "PP-17" "PP-18" "PP-1" "PP-2" "PP-3" "PP-6" "PP-4" "PP-7" "PP-5"
## [19] "PP-8"
pxrf_MB2: Final analysis data set for ALL MUDBRICK samples (= no soil)
pxrf_MB2 <- pxrf_final[c(1:45), ]
rownames(pxrf_MB2)
## [1] "PP-10" "PP-11" "PP-12" "PP-13" "PP-9" "PP-19" "PP-14" "PP-15" "PP-16"
## [10] "PP-17" "PP-18" "1" "10" "11" "12" "13" "14" "15"
## [19] "16" "17" "18" "19" "2" "20" "21" "22" "23"
## [28] "24" "25" "26" "27" "28" "29" "3" "30" "31"
## [37] "32" "33" "34" "4" "5" "6" "7" "8" "9"
Only the “new” mudbrick samples
# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_MB[3:12], 3), data = pxrf_MB, label = TRUE, label.size = 3)
PCA with only new mudbrick samples:
# Elements
colnames(pxrf_MB[3:12])
## [1] "MgO" "Ti" "Mn" "Fe" "Cu" "Zn" "Rb" "Sr" "Y" "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_MB[3:12])
summary(pca_1)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.5967 1.0886 0.99328 0.85704 0.40479 0.33892 0.20666
## Proportion of Variance 0.6735 0.1184 0.09854 0.07336 0.01637 0.01147 0.00427
## Cumulative Proportion 0.6735 0.7918 0.89037 0.96373 0.98010 0.99157 0.99584
## PC8 PC9 PC10
## Standard deviation 0.17917 0.09521 0.02255
## Proportion of Variance 0.00321 0.00091 0.00005
## Cumulative Proportion 0.99904 0.99995 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")
autoplot(pca_1, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE, main = "PCA Palaepaphos grouped by area")
PCA(pxrf_MB[3:12])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 11 individuals, described by 10 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
PCA with soil samples:
# Elements
colnames(pxrf_soil[3:12])
## [1] "MgO" "Ti" "Mn" "Fe" "Cu" "Zn" "Rb" "Sr" "Y" "Zr"
# PCA analysis
pca_2 <- prcomp(pxrf_soil[3:12])
summary(pca_2)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.0533 1.6050 1.0187 0.82293 0.69376 0.39645 0.33871
## Proportion of Variance 0.4498 0.2748 0.1107 0.07225 0.05135 0.01677 0.01224
## Cumulative Proportion 0.4498 0.7247 0.8354 0.90763 0.95898 0.97575 0.98799
## PC8 PC9 PC10
## Standard deviation 0.24806 0.21529 0.0685
## Proportion of Variance 0.00657 0.00495 0.0005
## Cumulative Proportion 0.99455 0.99950 1.0000
# PCA plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")
autoplot(pca_2, data=pxrf_soil, colour='Area', shape = FALSE, label = TRUE, main = "PCA Palaepaphos grouped by area")
autoplot(pca_2, data=pxrf_soil, colour='Type', shape = FALSE, label = TRUE, main = "PCA Palaepaphos grouped by sample type")
PCA(pxrf_soil[3:12])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 19 individuals, described by 10 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
Including the previously published ones, no soil
# Elements
colnames(pxrf_MB2[3:12])
## [1] "MgO" "Ti" "Mn" "Fe" "Cu" "Zn" "Rb" "Sr" "Y" "Zr"
# PCA analysis
pca_3 <- prcomp(pxrf_MB2[3:12])
summary(pca_3)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.8473 1.1712 0.76106 0.60306 0.44231 0.40557 0.30364
## Proportion of Variance 0.5429 0.2182 0.09215 0.05786 0.03112 0.02617 0.01467
## Cumulative Proportion 0.5429 0.7611 0.85326 0.91112 0.94224 0.96841 0.98307
## PC8 PC9 PC10
## Standard deviation 0.22882 0.19928 0.11965
## Proportion of Variance 0.00833 0.00632 0.00228
## Cumulative Proportion 0.99140 0.99772 1.00000
# PCA plots
biplot(pca_3, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")
autoplot(pca_3, data=pxrf_MB2, colour='Area', shape = FALSE, label = TRUE, main = "PCA Palaepaphos grouped by area")
PCA(pxrf_MB2[3:12])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 45 individuals, described by 10 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
HCA including only new samples:
# HCA dendrogram, samples color coded by area:
dend <-
pxrf_MB %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
color_branches(k=6)
area <- as.factor(pxrf_MB[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]
plot(dend, main="HCA with sample areas")
rect.dendrogram(dend, 4, border = "Black")
legend("topright", legend = levels(area), fill = cols_a)
# HCA dendrogram, samples color coded by type:
dend2 <-
pxrf_soil %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
color_branches(k=6)
type <- as.factor(pxrf_soil[, 2])
n_type <- length(unique(type))
cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l = 50)
col_type <- cols_t[type]
labels_colors(dend2) <- col_type[order.dendrogram(dend2)]
plot(dend2, main="HCA with new samples by type")
rect.dendrogram(dend2, 6, border = "Black")
legend("topright", legend = levels(type), fill = cols_t)
HCA including all mudbrick samples :
# HCA dendrogram, samples color coded by area:
dend3 <-
pxrf_MB2 %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
color_branches(k=4)
area <- as.factor(pxrf_MB2[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l = 50)
col_area <- cols_a[area]
labels_colors(dend3) <- col_area[order.dendrogram(dend3)]
plot(dend3, main="HCA with sample areas")
rect.dendrogram(dend3, 4, border = "Black")
legend("topright", legend = levels(area), fill = cols_a)
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern)
# Read and filter data
grain <- read.xlsx("data/grain_PP.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))
# Creating subsets for only mudbricks
MB_grain <- grain_01[c(1:11),]
MB_context <- grain[c(1:11),]
# Ternary plots
ggtern(data=MB_grain, aes(x=Clay, y=Sand, z=Silt, color=MB_context$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(MB_grain)), size=3)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_nomask() +
geom_mask() +
geom_point(size=2)
We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.
# Libraries
library(openxlsx);
library(dplyr);
library(ggplot2);
library(ggrepel);
library(tidyr);
library(reshape2) # Melt-function for reshaping data to long form
loi <- read.xlsx("data/loi_PP.xlsx", sep=";")
tga <- read.xlsx("data/tga_PP.xlsx", sep=";")
summary(loi)
## sample crucible wet;weight sample;wet
## Length:27 Min. :8.100 Min. : 9.637 Min. :1.537
## Class :character 1st Qu.:8.639 1st Qu.:10.859 1st Qu.:2.193
## Mode :character Median :8.997 Median :11.184 Median :2.371
## Mean :8.988 Mean :11.274 Mean :2.286
## 3rd Qu.:9.454 3rd Qu.:11.844 3rd Qu.:2.453
## Max. :9.560 Max. :12.301 Max. :2.785
## dry_weight after_550_C after_950_C context
## Min. : 9.595 Min. : 9.484 Min. : 9.23 Length:27
## 1st Qu.:10.813 1st Qu.:10.721 1st Qu.:10.27 Class :character
## Median :11.132 Median :11.019 Median :10.59 Mode :character
## Mean :11.216 Mean :11.113 Mean :10.67
## 3rd Qu.:11.777 3rd Qu.:11.681 3rd Qu.:11.18
## Max. :12.218 Max. :12.076 Max. :11.56
## type
## Length:27
## Class :character
## Mode :character
##
##
##
summary(tga)
## Name Analysis;Date Batch Wet
## Length:30 Length:30 Length:30 Length:30
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## Dry Mass_550 Mass_950 context
## Length:30 Length:30 Length:30 Length:30
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## type
## Length:30
## Class :character
## Mode :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
# Removing duplicates and creating MB only subset
loi2 <- subset(loi[1:19, ])
loi_MB <- subset(loi2[9:19, ])
rownames(loi_MB)
## [1] "PP-9" "PP-10" "PP-11" "PP-12" "PP-13" "PP-14" "PP-15" "PP-16" "PP-17"
## [10] "PP-18" "PP-19"
# Colored boxplots in same graph
loi_MB_long <- loi_MB[c(8,13:14)]
loi_MB_long <- melt(loi_MB_long, id = "context") # Reshaping data to long form
ggplot(loi_MB_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Traditional LOI by temperature (MB only)",
x ="Temperature", y = "LOI")
ggplot(loi_MB_long, aes(x = context, y = value, color = variable)) +
geom_boxplot() +
labs(title="Traditional LOI by context (MB only)",
x ="Context", y = "LOI")
# Colored boxplots in same graph: Soil samples included
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") # Reshaping data to long form
ggplot(loi_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Traditional LOI by temperature (soil included)",
x ="Temperature", y = "LOI")
ggplot(loi_long, aes(x = context, y = value, color = variable)) +
geom_boxplot() +
labs(title="Traditional LOI by context (soil included)",
x ="Context", y = "LOI")
# MB only biplot
ggplot(loi_MB,
aes(c550, c950, label = rownames(loi_MB), colour = factor(context))) +
geom_point(size=2) +
geom_text_repel(size=3) + labs(title = "MB only organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
# Color by context biplot
ggplot(loi,
aes(c550, c950, label = rownames(loi), colour = factor(context))) +
geom_point(size=2, aes(shape = factor(type))) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
# Color by type biplot
ggplot(loi,
aes(c550, c950)) +
geom_point(size=2, aes(colour = factor(type))) +
labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi_MB, key = var, value = value, c550, c950)
ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Creating MB only subset
tga_MB1 <- subset(tga[9:22, ])
# Colored boxplots in same graph
tga_MB_long <- tga_MB1[c(8,10:11)]
tga_MB_long <- melt(tga_MB_long, id = "context") # Reshaping data to long form
ggplot(tga_MB_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI by temperature",
x ="Temperature", y = "LOI")
ggplot(tga_MB_long, aes(x = context, y = value, color = variable)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI by context",
x ="Context", y = "LOI")
# Colored boxplots in same graph: soil samples included
tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") # Reshaping data to long form
ggplot(tga_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI by temperature",
x ="Temperature", y = "LOI")
ggplot(tga_long, aes(x = context, y = value, color = variable)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI by context",
x ="Context", y = "LOI")
# Removing duplicates and creating MB only subset
tga_MB <- subset(tga[9:20, ])
tga_MB <- subset(tga_MB[-4, ])
rownames(tga_MB)
## [1] "PP-9" "PP-10" "PP-11" "PP-12" "PP-13" "PP-14" "PP-15" "PP-16" "PP-17"
## [10] "PP-18" "PP-19"
# Color by context biplot
ggplot(tga_MB,
aes(c550, c950, label = rownames(tga_MB), colour = factor(context))) +
geom_point(size=2) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga_MB, key = var, value = value, c550, c950)
ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_PP.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :62.38 Min. : 7.27 Min. :19.90
## 1st Qu.:63.47 1st Qu.: 8.50 1st Qu.:21.36
## Median :64.19 Median : 9.34 Median :21.72
## Mean :65.50 Mean : 9.29 Mean :21.92
## 3rd Qu.:65.78 3rd Qu.:10.56 3rd Qu.:22.96
## Max. :72.00 Max. :10.90 Max. :23.90
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
# Libraries
library(openxlsx);
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms
# Read the raw data
pxrf <- read.xlsx("data/pxrf_AA.xlsx", sep=";")
# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))
# Remove unused columns
pxrf <- pxrf %>%
select(-Site) %>%
select(-'Other;name') %>%
select(-'File;#') %>%
select(-DateTime) %>%
select(-Application) %>%
select(-Method) %>%
select(-ElapsedTime) %>%
select(-'Cal;Check')
# Coercing "< LOD" -results to NA:s
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)
# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)
# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)
# Assigning sample names as new row names
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)
# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"
# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))
# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])
# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)
pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)
colnames(pxrf_averaged)
## [1] "Area" "Type" "MgO" "MgO;Err" "Al2O3" "Al2O3;Err"
## [7] "SiO2" "SiO2;Err" "P2O5" "P2O5;Err" "S" "S;Err"
## [13] "Cl" "Cl;Err" "K2O" "K2O;Err" "CaO" "CaO;Err"
## [19] "Ti" "Ti;Err" "V" "V;Err" "Cr" "Cr;Err"
## [25] "Mn" "Mn;Err" "Fe" "Fe;Err" "Co" "Co;Err"
## [31] "Ni" "Ni;Err" "Cu" "Cu;Err" "Zn" "Zn;Err"
## [37] "As" "As;Err" "Se" "Se;Err" "Rb" "Rb;Err"
## [43] "Sr" "Sr;Err" "Y" "Y;Err" "Zr" "Zr;Err"
## [49] "Nb" "Nb;Err" "Mo" "Mo;Err" "Rh" "Rh;Err"
## [55] "Pd" "Pd;Err" "Ag" "Ag;Err" "Cd" "Cd;Err"
## [61] "Sn" "Sn;Err" "Sb" "Sb;Err" "Ba" "Ba;Err"
## [67] "La" "La;Err" "Ce" "Ce;Err" "Hf" "Hf;Err"
## [73] "Ta" "Ta;Err" "W" "W;Err" "Pt" "Pt;Err"
## [79] "Au" "Au;Err" "Hg" "Hg;Err" "Tl" "Tl;Err"
## [85] "Pb" "Pb;Err" "Bi" "Bi;Err" "Th" "Th;Err"
## [91] "U" "U;Err"
pxrf_all: All elements (the numbers below are the amount of NA values)
colSums(is.na(pxrf_all))
## Area Type MgO Al2O3 SiO2 P2O5 S Cl K2O CaO Ti V Cr
## 0 0 0 0 0 1 0 0 0 0 0 5 8
## Mn Fe Ni Cu Zn As Rb Sr Y Zr Nb Ba
## 0 0 0 0 0 1 0 0 0 0 4 3
pxrf_final: Final analysis data set with only the selected elements
# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>%
select(-Al2O3) %>%
select(-CaO) %>%
select(-SiO2) %>%
select(-Cl) %>%
select(-K2O) %>%
select(-Ni) %>%
select(-P2O5) %>%
select(-S) %>%
select(-V) %>%
select(-As) %>%
select(-Nb) %>%
select(-Cr) %>%
select(-Ba)
# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0
# Final analysis data set
colnames(pxrf_final)
## [1] "Area" "Type" "MgO" "Ti" "Mn" "Fe" "Cu" "Zn" "Rb" "Sr"
## [11] "Y" "Zr"
# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:12], 4), data = pxrf_final, label = TRUE, label.size = 3)
PCA with final elements:
# Elements
colnames(pxrf_final[3:12])
## [1] "MgO" "Ti" "Mn" "Fe" "Cu" "Zn" "Rb" "Sr" "Y" "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:12])
summary(pca_1)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.6820 1.1972 0.84041 0.53728 0.45231 0.32524 0.2122
## Proportion of Variance 0.7193 0.1433 0.07063 0.02887 0.02046 0.01058 0.0045
## Cumulative Proportion 0.7193 0.8627 0.93328 0.96215 0.98261 0.99318 0.9977
## PC8 PC9 PC10
## Standard deviation 0.14003 0.05947 0
## Proportion of Variance 0.00196 0.00035 0
## Cumulative Proportion 0.99965 1.00000 1
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Artaxata elements")
autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE, main = "PCA Artaxata grouped by area")
PCA(pxrf_final[3:12])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 10 individuals, described by 10 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
# HCA dendrogram, samples color coded by area:
dend <-
pxrf_final %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
color_branches(k=5)
area <- as.factor(pxrf_final[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]
plot(dend, main="HCA with sample areas")
rect.dendrogram(dend, 5, border = "Black")
legend("topright", legend = levels(area), fill = cols_a)
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern)
# Read and filter data
grain <- read.xlsx("data/grain_AA.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))
# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(grain_01)), size=3)
We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.
# Libraries
library(openxlsx);
library(dplyr);
library(ggplot2);
library(ggrepel);
library(tidyr);
library(reshape2) # Melt-function for reshaping data to long form
loi <- read.xlsx("data/loi_AA.xlsx", sep=";")
tga <- read.xlsx("data/tga_AA.xlsx", sep=";")
summary(loi)
## sample crucible wet_weight sample_wet
## Length:10 Min. :8.066 Min. :10.70 Min. :1.923
## Class :character 1st Qu.:8.461 1st Qu.:10.93 1st Qu.:2.326
## Mode :character Median :8.995 Median :11.48 Median :2.558
## Mean :8.892 Mean :11.39 Mean :2.496
## 3rd Qu.:9.174 3rd Qu.:11.75 3rd Qu.:2.701
## Max. :9.890 Max. :12.18 Max. :2.842
## dry_weight after_550_C after_950_C context
## Min. :10.58 Min. :10.47 Min. :10.31 Length:10
## 1st Qu.:10.85 1st Qu.:10.78 1st Qu.:10.60 Class :character
## Median :11.43 Median :11.33 Median :11.16 Mode :character
## Mean :11.32 Mean :11.22 Mean :11.07
## 3rd Qu.:11.69 3rd Qu.:11.58 3rd Qu.:11.42
## Max. :12.13 Max. :12.06 Max. :11.94
## type
## Length:10
## Class :character
## Mode :character
##
##
##
summary(tga)
## Name Analysis;Date Batch Wet
## Length:12 Length:12 Length:12 Length:12
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## Dry Mass_550 Mass_950 context
## Length:12 Length:12 Length:12 Length:12
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## type
## Length:12
## Class :character
## Mode :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
# Colored boxplots in same graph
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") # Reshaping data to long form
ggplot(loi_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Traditional LOI by temperature",
x ="Temperature", y = "LOI")
ggplot(loi_long, aes(x = context, y = value, color = variable)) +
geom_boxplot() +
labs(title="Traditional LOI by context",
x ="Context", y = "LOI")
# Color by context
ggplot(loi,
aes(c550, c950, label = rownames(loi), colour = factor(context))) +
geom_point(size=2, aes(shape = factor(type))) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)
ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Colored boxplots in same graph
tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") # Reshaping data to long form
ggplot(tga_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI by temperature",
x ="Temperature", y = "LOI")
ggplot(tga_long, aes(x = context, y = value, color = variable)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI by context",
x ="Context", y = "LOI")
# Color by context biplot
ggplot(tga,
aes(c550, c950, label = rownames(tga), colour = factor(context))) +
geom_point(size=2) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)
ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_AA.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :51.37 Min. :3.880 Min. :15.92
## 1st Qu.:57.73 1st Qu.:4.402 1st Qu.:16.49
## Median :58.79 Median :4.555 Median :16.93
## Mean :58.18 Mean :5.067 Mean :17.32
## 3rd Qu.:59.58 3rd Qu.:4.695 3rd Qu.:17.32
## Max. :61.57 Max. :8.670 Max. :21.47
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color (a*b)") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "A versus luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "B versus luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)